###################################################
library(scales)

##################################################################################################
# Refugees 

# Use Model 1 from the code "Income Prediction Exercise.R" :

dfcohort$MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog=dfcohort$PredictedLogIncome_pct_model1
dfcohort$DESTINATION_CMA[which(dfcohort$DESTINATION_CMA=="35535")] <- "Toronto"; dfcohort$Toronto <- dfcohort$DESTINATION_CMA=="Toronto"
dfcohort$DESTINATION_CMA[which(dfcohort$DESTINATION_CMA=="24462")] <- "Montreal"; dfcohort$Montreal <- dfcohort$DESTINATION_CMA=="Montreal"
dfcohort$DESTINATION_CMA[which(dfcohort$DESTINATION_CMA=="59933")] <- "Vancouver"; dfcohort$Vancouver <- dfcohort$DESTINATION_CMA=="Vancouver"
dfcohort$DESTINATION_CMA[which(dfcohort$DESTINATION_CMA=="48825")] <- "Calgary"; dfcohort$Calgary <- dfcohort$DESTINATION_CMA=="Calgary"
dfcohort$DESTINATION_CMA[which(dfcohort$DESTINATION_CMA=="48835")] <- "Edmonton"; dfcohort$Edmonton <- dfcohort$DESTINATION_CMA=="Edmonton"

# bin scatter: by deciles
dfcohort$hhinc_decile=as.numeric(cut(dfcohort$MainParent_Income_HH_MainParentAge45_49_pctparent, breaks=seq(from=0,to=100,by=5), labs=seq(from=0,to=100,by=5)))
dfcohort$hhinc_decile_predict=as.numeric(cut(dfcohort$MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, breaks=seq(from=0,to=100,by=5), labs=seq(from=0,to=100,by=5)))



par(mar=c(4.5,4.7,3,2.5))

m<-matrix(c(1,2,3,4),nrow=2,ncol=2,byrow=TRUE)

layout(mat=m,heights=c(0.4,0.4,0.2))


# FIX THESE AXES (HISTOGRAM Y AXES)


dfrefug_actual=dfcohort%>% filter(ImmigrationCategory=="Refugee") %>% group_by(hhinc_decile) %>% summarize(Child_exp_rank=mean(Child_Income_IND_30_34_pct, na.rm=TRUE),count=n())

a <- hist(dfcohort$MainParent_Income_HH_MainParentAge45_49_pctparent[dfcohort$ImmigrationCategory=="Refugee"], plot=FALSE)
a$counts <- a$density/sum(a$density)
plot(a, axes=FALSE, xlab="",ylab="",main="", col=alpha("grey95", 0.5), ylim=c(0,0.12))
axis(side=4, at=c(0,0.02,0.04,0.06,0.08,0.10,0.12,0.14), c(0,0.02,0.04,0.06,0.08,0.10,0.12,0.14))
par(new=TRUE)
plot(dfrefug_actual$hhinc_decile,dfrefug_actual$Child_exp_rank, xlab="Realized Parent Ventile",ylab="Child Rank",main="Refugees", col="black", pch=16, cex=1.3, ylim=c(40,75))
#mtext("Frequency",side=4,line=0)



#mtext("Density",side=4,line=0)

write.csv(dfrefug_actual,"H:/Zheng_10223/ToVet/Supporting/refugactualcounts.csv")
# bin scatter: by deciles

dfrefug_predict=dfcohort%>% filter(ImmigrationCategory=="Refugee") %>% group_by(hhinc_decile_predict) %>% summarize(Child_exp_rank=mean(Child_Income_IND_30_34_pct, na.rm=TRUE),count=n())

a <- hist(dfcohort$MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog[dfcohort$ImmigrationCategory=="Refugee"], plot=FALSE)
a$counts <- a$density/sum(a$density)
plot(a, axes=FALSE, xlab="",ylab="",main="", col=alpha("grey95", 0.5), ylim=c(0,0.12))
axis(side=4, at=c(0,0.02,0.04,0.06,0.08,0.10,0.12,0.14), c(0,0.02,0.04,0.06,0.08,0.10,0.12,0.14))
par(new=TRUE)
plot(dfrefug_predict$hhinc_decile_predict,dfrefug_predict$Child_exp_rank, xlab="Predicted Parent Ventile",ylab="",main="Refugees \n Underplacement Adjusted", col="black",pch=16, cex=1.3, ylim=c(40,75))

write.csv(dfrefug_predict,"H:/Zheng_10223/ToVet/Supporting/refugpredictcounts.csv")


dfnonrefug_actual=dfcohort%>% filter(ImmigrationCategory!="Refugee") %>% group_by(hhinc_decile) %>% summarize(Child_exp_rank=mean(Child_Income_IND_30_34_pct, na.rm=TRUE),counts=n())

a <- hist(dfcohort$MainParent_Income_HH_MainParentAge45_49_pctparent[dfcohort$ImmigrationCategory!="Refugee"], plot=FALSE)
a$counts <- a$density/sum(a$density)
plot(a, axes=FALSE, xlab="",ylab="",main="", col=alpha("grey95", 0.5), ylim=c(0,0.12))
axis(side=4, at=c(0,0.02,0.04,0.06,0.08,0.10,0.12,0.14), c(0,0.02,0.04,0.06,0.08,0.10,0.12,0.14))
par(new=TRUE)
plot(dfnonrefug_actual$hhinc_decile,dfnonrefug_actual$Child_exp_rank, xlab="Realized Parent Ventile",ylab="Child Rank",main="Non-Refugees", col="black", pch=16, cex=1.3, ylim=c(40,75))
write.csv(dfnonrefug_actual,"H:/Zheng_10223/ToVet/Supporting/nonrefugactualcounts.csv")

# bin scatter: by deciles

dfnonrefug_predict=dfcohort%>% filter(ImmigrationCategory!="Refugee") %>% group_by(hhinc_decile_predict) %>% summarize(Child_exp_rank=mean(Child_Income_IND_30_34_pct, na.rm=TRUE),counts=n())

a <- hist(dfcohort$MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog[dfcohort$ImmigrationCategory!="Refugee"], plot=FALSE)
a$counts <- a$density/sum(a$density)
plot(a, axes=FALSE, xlab="",ylab="",main="", col=alpha("grey95", 0.5), ylim=c(0,0.12))
axis(side=4, at=c(0,0.02,0.04,0.06,0.08,0.10,0.12,0.14), c(0,0.02,0.04,0.06,0.08,0.10,0.12,0.14))
par(new=TRUE)
plot(dfnonrefug_predict$hhinc_decile_predict,dfnonrefug_predict$Child_exp_rank, xlab="Predicted Parent Ventile",ylab="",main="Non-Refugees \n Underplacement Adjusted" ,col="black", pch=16, cex=1.3, ylim=c(40,75))

write.csv(dfnonrefug_predict,"H:/Zheng_10223/ToVet/Supporting/nonrefugpredictcounts.csv")



# predicted coef plots 

# HAVE TO ADD GENDER HERE 
#### FIGURE 2: COEF PLOT (absolute upward mobility) ####
## INDIVIDUAL
refugee <- dfcohort[which(dfcohort$ImmigrationCategory=="Refugee"),]
IGM_refugee <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=refugee[which(refugee$ImmigrationCategory=="Refugee"),])


IGM_male <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=refugee[which(refugee$Gender_Child==1),])
IGM_female <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=refugee[which(refugee$Gender_Child==2),])



IGM_age0_4 <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=refugee[which(refugee$LANDING_AGE %in% c(0,1,2,3,4)),])
IGM_age5_9 <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=refugee[which(refugee$LANDING_AGE %in% c(5,6,7,8,9)),])
IGM_age10_14 <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=refugee[which(refugee$LANDING_AGE %in% c(10,11,12,13,14)),])
IGM_age15_17 <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=refugee[which(refugee$LANDING_AGE %in% c(15,16,17)),])

IGM_english <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=refugee[which(refugee$AnyEnglish_Main==1),])
IGM_noenglish <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=refugee[which(refugee$AnyEnglish_Main==0),])

IGM_manager <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=refugee[which(refugee$IntendedOccupation_Main=="Managerial/Professional"),])
IGM_skilled <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=refugee[which(refugee$IntendedOccupation_Main=="Skilled and Technical"),])
IGM_clerical <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=refugee[which(refugee$IntendedOccupation_Main=="Clerical and Laborers"),])
IGM_newworker <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=refugee[which(refugee$IntendedOccupation_Main=="New Workers"),])
IGM_nonworker <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=refugee[which(refugee$IntendedOccupation_Main=="Non-Workers"),])

IGM_europe <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=refugee[which(refugee$WORLD_AREA_BIRTH=="Europe"),])
IGM_easternasia <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=refugee[which(refugee$WORLD_AREA_BIRTH=="Eastern Asia"),])
IGM_africa <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=refugee[which(refugee$WORLD_AREA_BIRTH=="Africa and Middle East"),])
IGM_southamerica <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=refugee[which(refugee$WORLD_AREA_BIRTH=="South and Central America"),])
IGM_southasia <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=refugee[which(refugee$WORLD_AREA_BIRTH=="Southern Asia"),])
IGM_oceania <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=refugee[which(refugee$WORLD_AREA_BIRTH=="Oceania and other Asia"),])

IGM_noenclave <-lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=refugee[which(refugee$enclave==0),])
IGM_enclave <-lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=refugee[which(refugee$enclave==1),])


IGM_toronto <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=refugee[which(refugee$DESTINATION_CMA=="Toronto"),])
IGM_montreal <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=refugee[which(refugee$DESTINATION_CMA=="Montreal"),])
IGM_vancouver <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=refugee[which(refugee$DESTINATION_CMA=="Vancouver"),])
IGM_calgary <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=refugee[which(refugee$DESTINATION_CMA=="Calgary"),])
IGM_edmonton <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=refugee[which(refugee$DESTINATION_CMA=="Edmonton"),])

output <- rbind(predict(IGM_refugee, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                
                predict(IGM_male, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                predict(IGM_female, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                
                predict(IGM_age0_4, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                predict(IGM_age5_9, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                predict(IGM_age10_14, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                predict(IGM_age15_17, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                predict(IGM_noenglish, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                predict(IGM_english, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                predict(IGM_nonworker, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                predict(IGM_newworker, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                predict(IGM_clerical, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                predict(IGM_skilled, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                predict(IGM_manager, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                predict(IGM_europe, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                predict(IGM_southamerica, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                predict(IGM_oceania, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                predict(IGM_africa, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                predict(IGM_southasia, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                predict(IGM_easternasia, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                predict(IGM_noenclave, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                predict(IGM_enclave, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                
                predict(IGM_calgary, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                
                predict(IGM_edmonton, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                predict(IGM_vancouver, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                predict(IGM_toronto, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                predict(IGM_montreal, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"))

others <- dfcohort[which(!dfcohort$ImmigrationCategory=="Refugee"),]
IGM_others <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=others)


IGM_male <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=others[which(others$Gender_Child==1),])
IGM_female <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=others[which(others$Gender_Child==2),])


IGM_age0_4 <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=others[which(others$LANDING_AGE %in% c(0,1,2,3,4)),])
IGM_age5_9 <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=others[which(others$LANDING_AGE %in% c(5,6,7,8,9)),])
IGM_age10_14 <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=others[which(others$LANDING_AGE %in% c(10,11,12,13,14)),])
IGM_age15_17 <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=others[which(others$LANDING_AGE %in% c(15,16,17)),])

IGM_english <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=others[which(others$AnyEnglish_Main==1),])
IGM_noenglish <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=others[which(others$AnyEnglish_Main==0),])

IGM_manager <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=others[which(others$IntendedOccupation_Main=="Managerial/Professional"),])
IGM_skilled <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=others[which(others$IntendedOccupation_Main=="Skilled and Technical"),])
IGM_clerical <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=others[which(others$IntendedOccupation_Main=="Clerical and Laborers"),])
IGM_newworker <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=others[which(others$IntendedOccupation_Main=="New Workers"),])
IGM_nonworker <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=others[which(others$IntendedOccupation_Main=="Non-Workers"),])

IGM_europe <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=others[which(others$WORLD_AREA_BIRTH=="Europe"),])
IGM_easternasia <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=others[which(others$WORLD_AREA_BIRTH=="Eastern Asia"),])
IGM_africa <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=others[which(others$WORLD_AREA_BIRTH=="Africa and Middle East"),])
IGM_southamerica <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=others[which(others$WORLD_AREA_BIRTH=="South and Central America"),])
IGM_southasia <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=others[which(others$WORLD_AREA_BIRTH=="Southern Asia"),])
IGM_oceania <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=others[which(others$WORLD_AREA_BIRTH=="Oceania and other Asia"),])


IGM_noenclave <-lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=others[which(others$enclave==0),])
IGM_enclave <-lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=others[which(others$enclave==1),])


IGM_toronto <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=others[which(others$DESTINATION_CMA=="Toronto"),])
IGM_montreal <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=others[which(others$DESTINATION_CMA=="Montreal"),])
IGM_vancouver <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=others[which(others$DESTINATION_CMA=="Vancouver"),])
IGM_calgary <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=others[which(others$DESTINATION_CMA=="Calgary"),])
IGM_edmonton <- lm(Child_Income_IND_30_34_pct~MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog, data=others[which(others$DESTINATION_CMA=="Edmonton"),])

output_others <- rbind(predict(IGM_others, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                       
                       predict(IGM_male, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                       predict(IGM_female, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                       
                       
                       
                       predict(IGM_age0_4, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                       predict(IGM_age5_9, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                       predict(IGM_age10_14, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                       predict(IGM_age15_17, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                       predict(IGM_noenglish, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                       predict(IGM_english, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                       predict(IGM_nonworker, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                       predict(IGM_newworker, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                       predict(IGM_clerical, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                       predict(IGM_skilled, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                       predict(IGM_manager, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                       predict(IGM_europe, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                       predict(IGM_southamerica, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                       predict(IGM_oceania, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                       predict(IGM_africa, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                       predict(IGM_southasia, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                       predict(IGM_easternasia, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                       predict(IGM_noenclave, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                       predict(IGM_enclave, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                       
                       predict(IGM_calgary, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                       predict(IGM_edmonton, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                       predict(IGM_vancouver, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                       predict(IGM_toronto, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"),
                       predict(IGM_montreal, newdata=data.frame("MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog"=25), interval="confidence"))



write.csv(output,"H:/Zheng_10223/ToVet/Output/refugeepredictactp25.csv")
write.csv(output_others,"H:/Zheng_10223/ToVet/Output/nonrefugeepredictactp25.csv")

#
# COUNTS: FOR ALL THREE COEF PLOTS

# Landing Age:
dfcohort=dfcohort[!is.na(dfcohort$MainParent_Income_HH_MainParentAge45_49_pctparent_predictedlog   ),]
# Refugees
length(which( dfcohort$ImmigrationCategory=="Refugee" & dfcohort$LANDING_AGE %in% c(0,1,2,3,4) ))
length(which(dfcohort$ImmigrationCategory=="Refugee" & dfcohort$LANDING_AGE %in% c(5,6,7,8,9) ))
length(which(dfcohort$ImmigrationCategory=="Refugee" & dfcohort$LANDING_AGE %in% c(10,11,12,13,14) ))
length(which(dfcohort$ImmigrationCategory=="Refugee" & dfcohort$LANDING_AGE %in% c(15,16,17) ))

# Non-Refugees
length(which(dfcohort$ImmigrationCategory!="Refugee" & dfcohort$LANDING_AGE %in% c(0,1,2,3,4) ))
length(which(dfcohort$ImmigrationCategory!="Refugee" & dfcohort$LANDING_AGE %in% c(5,6,7,8,9) ))
length(which(dfcohort$ImmigrationCategory!="Refugee" & dfcohort$LANDING_AGE %in% c(10,11,12,13,14) ))
length(which(dfcohort$ImmigrationCategory!="Refugee" & dfcohort$LANDING_AGE %in% c(15,16,17) ))


# CMA
dfcma=dfcohort %>% group_by(refugeein,DESTINATION_CMA) %>% summarize(count=n())

dfcma=dfcma[dfcma$DESTINATION_CMA %in% c("Edmonton","Toronto","Calgary","Vancouver","Montreal"),]

write.csv(dfcma, "H:/Zheng_10223/ToVet/Supporting/coefcma.csv")
# World Area 
dfarea=dfcohort %>% group_by(refugeein,WORLD_AREA_BIRTH) %>% summarize(count=n())
write.csv(dfarea, "H:/Zheng_10223/ToVet/Supporting/coefarea.csv")
# Enclave 
dfenclave=dfcohort %>% group_by(refugeein, enclave) %>% summarize(count=n())
write.csv(dfenclave,"H:/Zheng_10223/ToVet/Supporting/coefenclave.csv")
# Language 
dflang=dfcohort %>% group_by(refugeein, AnyEnglish_Main) %>% summarize(count=n())
write.csv(dflang,"H:/Zheng_10223/ToVet/Supporting/coeflang.csv")
# Intended occ:
dfocc=dfcohort %>% group_by(refugeein,IntendedOccupation_Main) %>% summarize(count=n())
write.csv(dfocc,"H:/Zheng_10223/ToVet/Supporting/coefocc.csv")


dfgender=dfcohort %>% group_by(Gender_Child,refugeein) %>% summarize(count=n())


